We will use Gapminder Population Data version 7. It can be downloaded
from here. Download this MS Excel
file and save it in the data directory of this repo as
population_data.xlsx.
Note: Run these commands manually in the R console
install.packages("rnaturalearth")
install.packages("stringr")
install.packages("readxl")
install.packages("dplyr")
install.packages("ggplot2")
install.packages("plotly")
library(rnaturalearth)
library(stringr)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(scales)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
world_map = ne_countries(returnclass = "sf")
Note: the required data is in the 4th sheet of the MS Excel file.
world_pop = read_xlsx("data/population_data.xlsx", sheet = 4)
names(world_pop)
## [1] "geo" "name" "time" "Population"
setdiff(world_map$sovereignt, world_pop$name)
## [1] "Antarctica" "The Bahamas"
## [3] "Ivory Coast" "Democratic Republic of the Congo"
## [5] "Republic of Congo" "Northern Cyprus"
## [7] "Guinea Bissau" "Kyrgyzstan"
## [9] "Kosovo" "Laos"
## [11] "Macedonia" "United States of America"
## [13] "Western Sahara" "Somaliland"
## [15] "Republic of Serbia" "Slovakia"
## [17] "East Timor" "United Republic of Tanzania"
world_pop = world_pop %>%
mutate(sovereignt = name) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Tanzania", "United Republic of Tanzania")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "United States", "United States of America")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Congo, Dem. Rep.", "Democratic Republic of the Congo")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Bahamas", "The Bahamas")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Serbia", "Republic of Serbia")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Macedonia, FYR", "Macedonia")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Slovak Republic", "Slovakia")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Congo, Rep.", "Republic of Congo")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Kyrgyz Republic", "Kyrgyzstan")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Lao", "Laos")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Cote d'Ivoire", "Ivory Coast")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Timor-Leste", "East Timor")) %>%
mutate(sovereignt = replace(sovereignt, sovereignt == "Guinea-Bissau", "Guinea Bissau"))
Now we can join our two datasets, pick the columns we want for visualizing and remove missing values.
world_data = inner_join(world_map, world_pop, by = "sovereignt") %>%
select(geo, sovereignt, Population, time, geometry ) %>%
na.omit()
head(world_data)
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 60.52843 ymin: 29.31857 xmax: 75.15803 ymax: 38.48628
## CRS: +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## geo sovereignt Population time geometry
## 1 afg Afghanistan 3280000 1800 MULTIPOLYGON (((61.21082 35...
## 2 afg Afghanistan 3280000 1801 MULTIPOLYGON (((61.21082 35...
## 3 afg Afghanistan 3280000 1802 MULTIPOLYGON (((61.21082 35...
## 4 afg Afghanistan 3280000 1803 MULTIPOLYGON (((61.21082 35...
## 5 afg Afghanistan 3280000 1804 MULTIPOLYGON (((61.21082 35...
## 6 afg Afghanistan 3280000 1805 MULTIPOLYGON (((61.21082 35...
We are only visualizing the data every two years, so we will remove the rest of the data too.
world_data = world_data %>%
filter(time %in% seq(1800, 2100, by = 20))
head(world_data)
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 60.52843 ymin: 29.31857 xmax: 75.15803 ymax: 38.48628
## CRS: +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## geo sovereignt Population time geometry
## 1 afg Afghanistan 3280000 1800 MULTIPOLYGON (((61.21082 35...
## 2 afg Afghanistan 3288817 1820 MULTIPOLYGON (((61.21082 35...
## 3 afg Afghanistan 3586362 1840 MULTIPOLYGON (((61.21082 35...
## 4 afg Afghanistan 3922032 1860 MULTIPOLYGON (((61.21082 35...
## 5 afg Afghanistan 4288021 1880 MULTIPOLYGON (((61.21082 35...
## 6 afg Afghanistan 4707744 1900 MULTIPOLYGON (((61.21082 35...
ggplotly(
ggplot(
world_data,
aes(geometry = geometry, frame = time)
) +
geom_sf(aes(fill= Population)) +
theme_void() +
ggtitle("World Population Prediction 1800 - 2100") +
scale_fill_distiller(
palette = "RdBu",
label = number_format(scale=1e-6, suffix = "M"),
limits = c(0, 2000000000)
)
)